///////////////////////////////////////
//Data set for measures of labor input
///////////////////////////////////////

clear all
use "[Folder]\Panel_Ind.dta" 

merge m:1 LopNr_PersonNr using "[Folder]\TimeInv_Ind.dta", keepusing(FodelseLandnamn InvAr)
drop if _merge==2
drop _merge


merge m:1 LopNr_PeOrgNr  year using "[Folder]\Panel_Firm.dta", keepusing(Ftg_Foradlingsvarde Ftg_Eget_kapital Ftg_Anlaggningstillgangar Org_Sni2007G Org_Sni2007 Org_Sni2007 Ftg_Summa_bruttoinv )

drop if _merge==2
drop _merge
sort LopNr_PeOrgNr year
by LopNr_PeOrgNr year: egen FirmSize=count(LopNr_PeOrgNr)



////Individual income given in 100' kr; div by 10 ==> 1000'
replace KU1Ink = KU1Ink/10
drop if missing(KU1Ink)
tab year, gen(y_)
gen Age = year - Fodelsear


//Generate Education variable with aim to resemble isced11 categories.
nsplit Sun2000niva, digits(2 1) generate(Educ_d12 Educ_d3)

gen isced11 =.
replace isced11  = 1 if Educ_d12==10 | Educ_d12==99
replace isced11  = 2 if Educ_d12==20 | Educ_d12 == 31
replace isced11  = 3 if Educ_d12==32 | Educ_d12 == 33
replace isced11  = 4 if Educ_d12==41
replace isced11  = 5 if Educ_d12==52

replace isced11  = 6 if Educ_d12==53
replace isced11  = 5 if Educ_d12==53 & Educ_d3 == 5
replace isced11  = 7 if Educ_d12==54 | Educ_d12 == 55
replace isced11  = 8 if Educ_d12==60 | Educ_d12 == 62 | Educ_d12==64
replace isced11  = 0 if Educ_d12==00 
tabulate isced11, gen(isced11_dummy)

// Gen. Years of Education 
nsplit Sun2000niva, digits(1 1 1) generate(SunD1 SunD2 SunD3)
gen Educ_Ar = 0
replace Educ_Ar = 3 if SunD1==9
replace Educ_Ar = 6 if SunD1==1
replace Educ_Ar = 9 if SunD1==2
replace Educ_Ar = 10 if SunD1==3 & SunD2==1
replace Educ_Ar = 11 if SunD1==3 & SunD2==2

replace Educ_Ar = 12 if SunD1==3 & SunD2==3
replace Educ_Ar = 13 if SunD1==4 & SunD2==1
replace Educ_Ar = 14 if SunD1==5 & SunD2==2
replace Educ_Ar = 15 if SunD1==5 & SunD2==3
replace Educ_Ar = 16 if SunD1==5 & SunD2==4
replace Educ_Ar = 17 if SunD1==5 & SunD2==5
replace Educ_Ar = 18 if SunD1==6 & SunD2==0
replace Educ_Ar = 18 if SunD1==6 & SunD2==2
replace Educ_Ar = 20 if SunD1==6 & SunD2==4

drop SunD1 SunD2 SunD3


gen Exp=Age - Educ_Ar
gen Exp_Sq=Exp^2
gen Exp_Qu=Exp^3

gen FirmSize1=0
replace FirmSize1=1 if FirmSize<16 & !missing(FirmSize) 
gen FirmSize2=0
replace FirmSize2=1 if FirmSize>15 & FirmSize<51 & !missing(FirmSize)
gen FirmSize3=0
replace FirmSize3=1 if FirmSize>50 & FirmSize<151 & !missing(FirmSize)
gen FirmSize4=0
replace FirmSize4=1 if FirmSize>150  & FirmSize<501 & !missing(FirmSize)
gen FirmSize5=0
replace FirmSize5=1 if FirmSize>500  & !missing(FirmSize)
gen FirmSizeInd=.
replace FirmSizeInd=1 if FirmSize>6 & !missing(FirmSize) 




recode Kon (1=0) //Male
recode Kon (2=1)
gen ChildMig = 0
replace ChildMig = 1 if InvAr - Fodelsear < 17 & FodelseLandnamn!=107
gen SweTime=year-InvAr
replace SweTime=0 if FodelseLandnamn==107 | missing(SweTime)
replace SweTime=0 if SweTime<0
gen Foreign =0
replace Foreign=1 if FodelseLandnamn!=107 


gen logCap=log(Ftg_Anlaggningstillgangar)
gen logFtg_Foradlingsvarde = log(Ftg_Foradlingsvarde)
gen logGrosInvTot=log(Ftg_Summa_bruttoinv)



egen u = rowmiss(y_* FirmSize* Org_Sni2007G logFtg_Foradlingsvarde logCap logGrosInvTot)

gen FR_Incl = 0
replace FR_Incl  = 1 if u==0 & FirmSize>5 

merge 1:1 LopNr_PersonNr LopNr_PeOrgNr year using "\\micro.intra\projekt\P0738$\P0738_Gem\P0738_AppAddLonStat.dta", keepusing(tjomf tjomf_tot arbtim arbtim_tot)

drop if _merge==2
drop _merge 


//Create average "hours worked" variable by country
sort FodelseLandnamn year Kon FR_Incl
by FodelseLandnamn year Kon FR_Incl: egen tjomf_y_av=mean(tjomf) 
sum tjomf_y_av if FodelseLandnamn==107 & FR_Incl==1 & Kon==0
scalar tjomf_107=r(mean)
replace tjomf_y_av=tjomf_y_av/tjomf_107


sum tjomf_y_av if FodelseLandnamn!=107 & FR_Incl==1 & Kon==0
scalar tjomf_av=r(mean)
replace tjomf_y_av=tjomf_av if missing(tjomf_y_av)


//Get predicted units:
gen logKU1 = log(KU1Ink)

reg  logKU1 Exp Exp_Sq Exp_Qu isced11_dummy*  y_* if FodelseLandnamn==107  & FR_Incl==1  & Kon!=1
predict wage1, xb
sum wage1
gen EffUnits=exp(wage1 - r(mean))

save "[Folder]\Firm_Labor_Occ_Intermediate.dta", replace



drop if year <2014
merge m:1 KU1Ssyk4 using "[Data Folder]\ssyk12_isco08_up.dta"  , keepusing(iti nr_ca nr_ci rc rm nr_mp)
drop if _merge==2
drop _merge
save "[Folder]\Firm_Labor_Occ_nr.dta"

use "[Folder]\Firm_Labor_Occ_Intermediate.dta"
drop if year==2014
merge m:1 KU1Ssyk4 using  "[Data Folder]\ssyk96_isco08_up.dta"  , keepusing(iti nr_ca nr_ci rc rm nr_mp)
drop if _merge==2
drop _merge

append using "[Folder]\Firm_Labor_Occ_nr.dta"

gen FodelseLandnamn_Org=FodelseLandnamn if ChildMig==1
replace FodelseLandnamn=130 if ChildMig==1
gen nr = nr_mp+nr_ca -rm-rc

xtile nr_quart = nr if Kon==0 , nq(4) 

gen OccGroup=0
replace OccGroup=1 if Kon==0
replace OccGroup=2 if nr_quart==3 |nr_quart==4


save "[Folder]\Firm_Labor_Occ_nr.dta", replace


//Keep only variables that are kept in next step 
keep LopNr_PeOrgNr year FR_Incl FirmSize  KU1Ink  EffUnits u tjomf_y_av ChildMig FodelseLandnamn Kon OccGroup
sort LopNr_PeOrgNr year FodelseLandnamn Kon

replace EffUnits=EffUnits*tjomf_y_av



collapse (sum) KU1Ink  EffUnits (count) count_KU1=KU1Ink (max) FirmSize=FirmSize u=u, by(LopNr_PeOrgNr year FodelseLandnamn OccGroup)


//Run because I need one variable per group, not one observation per group.
forvalues i=1/130 {
	gen EU_M1_`i' = 0 
	replace EU_M1_`i' = EffUnits if FodelseLandnamn==`i' & OccGroup==1
	gen EU_M2_`i' = 0 
	replace EU_M2_`i' = EffUnits if FodelseLandnamn==`i' & OccGroup==2
	gen EU_F_`i' = 0 
	replace EU_F_`i' = EffUnits if FodelseLandnamn==`i' & OccGroup==0
	
	gen count_M1_`i' = 0 
	replace count_M1_`i' = count_KU1 if FodelseLandnamn==`i' & OccGroup==1
	gen count_M2_`i' = 0 
	replace count_M2_`i' = count_KU1 if FodelseLandnamn==`i' & OccGroup==2
	gen count_F_`i' = 0 
	replace count_F_`i' = count_KU1 if FodelseLandnamn==`i' & OccGroup==0

	
}




drop count_KU1
collapse (max) count*  EU_* FirmSize , by(LopNr_PeOrgNr year)


egen totcount=rowtotal(count_M* count_F*)
egen totEU=rowtotal(EU_M* EU_F*)
egen M_Count=rowtotal(count_M*)
egen F_Count=rowtotal(count_F*)

merge 1:1 LopNr_PeOrgNr year using  "[Folder]\Panel_Firm.dta"

save "[Folder]\Panel_Firm_Lab_OccSplit.dta" , replace


//Drop obs. w/o a firm				
drop if _merge!=3 


gen logFtg_Foradlingsvarde=log(Ftg_Foradlingsvarde)

forvalues i=1(1)130 {
	quietly gen ts_sq_`i' = ((count_F_`i' + count_M1_`i' + count_M2_`i') /totcount) ^2
}

egen HIndex_T = rowtotal(ts_sq_*)
drop ts_sq_*
		
				
//Create needed vars:


rename Ftg_Summa_bruttoinv GrosInvTot
gen logGrosInvTot = log(GrosInvTot)
gen logGrosInvTot_Sq = logGrosInvTot^2
gen logGrosInvTot_Qu = logGrosInvTot^3
  

gen Cap =  Ftg_Anlaggningstillgangar
gen logCap=log(Cap)
gen logCap_Sq=logCap^2
gen logCap_Qu=logCap^3

bysort Org_BelKommun: egen k_Size=sum(totcount)
sort k_Size
egen k_id =group(k_Size)
sum k_id
scalar maxid=r(max)
//City fixed effects; number 42 chosen to include FE for all counties with >50k inhabitants (the smallest urban area with a FE will have around 27k inhabitants). 
tab Org_BelKommun if k_id>maxid-42 ,gen(k_)
recode k_* (.=0)


drop if FirmSiz<6				
gen FirmSize_1=0
replace FirmSize_1=1 if FirmSize<16 & !missing(FirmSize) 
gen FirmSize_2=0
replace FirmSize_2=1 if FirmSize>15 & FirmSize<51 & !missing(FirmSize)
gen FirmSize_3=0
replace FirmSize_3=1 if FirmSize>50 & FirmSize<151 & !missing(FirmSize)
gen FirmSize_4=0
replace FirmSize_4=1 if FirmSize>150  & FirmSize<501 & !missing(FirmSize)
gen FirmSize_5=0
replace FirmSize_5=1 if FirmSize>500  & !missing(FirmSize)


tab year, gen(y_)
egen u = rowmiss(y_* FirmSize_* Org_Sni2007G logFtg_Foradlingsvarde logCap logGrosInvTot)


gen Ind = substr(Org_Sni2007G,2,.)
destring Ind, replace
tab Ind, gen(Ind)


sort LopNr_PeOrgNr year
gen EU_FNat=EU_F_107 
drop EU_F_107 
egen EU_FFor = rowtotal(EU_F_*)


drop EU_F_*

forvalues i=1(1)130 {
		gen lab_M1_`i' = EU_M1_`i'
		gen lab_M2_`i' = EU_M2_`i'
}
	
	
gen lab_FNat = EU_FNat
gen lab_FFor = EU_FFor


save "[Folder]\Panel_Firm_Lab_OccSplit_temp.dta" , replace


